This automated reported is intended to serve as a template for a coverage and gaps analyses. It provides examples of the analysis necessary to identify populations in need who are not currently covered by humanitarian action and provide recommendations on how partners may best reach them. Coverage and gaps analyses should be used in tandem with concerted monitoring and evaluation (M&E) – opportunities where Information Management and M&E may reinforce each other are discussed below, in relevant sections.
Coverage and gaps analyses are key documents, but are also rarely taken into account during operational planning or referenced during revisions of major strategic documents, such as Humanitarian Response Plans (HRPs). Neither are they mentioned in OCHA’s HRP guidance.
Given that needs always grossly outweigh available funding, it still remains an industry-wide challenege to respond adequately to gaps in coverage and reallocate resources accordingly. Too often, once committed to a course of action, clusters and their humanitarian partners do not reexamine or reevaluate their interventions. None of the analyses in this document require great skill – they have all been performed by a single analyst; yet, this type of report remains exceedingly rare.
Code button is clicked.# reading and cleaning -- you really should break it into parts
ven1 <- read_csv("consolidation 191209 1636.csv") %>%
clean_names() %>%
# removing unused columns
select(-c(codigodeestablecimientoocentro, loc_id, hrp_sitre_p_indicator,
tipoderespuesta, comentarios, coordeadas_gps_x, coordeadas_gps_y,
fechade_inicio, fecha_previstade_finalizacion)) %>%
# renaming unwieldy columns
rename(ubicacion = comunidadonombredelestablecimiento_centro,
sector = sector_areade_responsabiliad,
beneficiarios_meta = beneficiarios_meta_numerodepersonas,
estatus = estatusdeprogramacion) %>%
# mutating the date to the right format
mutate(month = as.factor(recode(month,
`4` = "30/04/2019",
`5` = "31/05/2019",
`6` = "30/06/2019",
`7` = "31/07/2019",
`8` = "31/08/2019",
`9` = "30/09/2019",
`10` = "31/10/2019"))) %>%
mutate(month = as.Date(month %>% strptime(., format = "%d/%m/%Y"))) %>%
mutate(org_lider = coalesce(org_lider, org_implementadora)) %>%
# correcting sector names
mutate(sector = str_replace_all(sector, c(
"Agua_saneamiento_higiene" = "WASH",
"educacion" = "Educacion",
"Nutricion" = "Nutricion",
"protección_Niños_Niñas_Adolescentes" = "Proteccion_NNA",
"Protección_Niños_Niñas_Adolescentes" = "Proteccion_NNA",
"Protección_Violencia_Género" = "Proteccion_GBV"))) %>%
# renaming beneficiary disaggregation columns
rename(f_0_18 = f_18,
m_0_18 = m_18,
f_18plus = f_18_2,
m_18plus = m_18_2) %>%
mutate(estado = rm_accent(str_to_upper(estado)),
municipio = rm_accent(str_to_upper(municipio)),
parroquia = rm_accent(str_to_upper(parroquia)),
ubicacion = rm_accent(str_to_upper(ubicacion)),
actividad = rm_accent(str_to_upper(actividad)),
categoria = rm_accent(str_to_upper(categoriadeactividad))) %>%
# recoding the estatus column
mutate(estatus = str_replace_all(estatus,
c("En ejecucion" = "ejecucion",
"en ejecución" = "ejecucion",
"en Ejecución" = "ejecucion",
"En ejecución" = "ejecucion",
"En Ejecución" = "ejecucion",
"Enejecución" = "ejecucion",
"43741" = "ejecucion",
"finalizada" = "finalizada",
"Finalizada" = "finalizada",
"Planeada" = "planeada",
"planeada con financiamiento" = "planeada",
"planeada sin financiamiento" = "planeada"))) %>%
replace_na(list(estatus = "ejecucion")) %>%
# removing all planned activities
filter(estatus != "planeada") %>%
filter(str_detect(pcode3, "^VE")) %>% # decide if you want to do this here or later
select(-c(23:92))
# I'm kinda doubting the use of u_ben, ya I think take it out? since you're only using it once
# Am I just making these out of habit? I could make them inside the
# code chunk for parr, but maybe I can find some justification for their existence,
# maybe the disaggregations?
# Vaccination activities filtered out
u_ben <- ven1 %>%
pivot_longer(f_0_18:m_18plus, names_to = "desagregacion", values_to = "beneficiarios") %>%
filter(categoriadeactividad != "Vacunacion") %>%
filter(beneficiarios != 0) %>%
group_by(ubicacion, desagregacion) %>%
slice(which.max(beneficiarios)) %>%
ungroup()
act_ben <- ven1 %>%
pivot_longer(f_0_18:m_18plus, names_to = "desagregacion", values_to = "beneficiarios") %>%
filter(beneficiarios != 0) %>%
group_by(ubicacion, desagregacion, actividad) %>%
slice(which.max(beneficiarios)) %>%
ungroup()
# rbind(sum(u_ben$beneficiarios),
# sum(act_ben$beneficiarios),
# sum(u_ben$beneficiarios) - sum(act_ben$beneficiarios))
# I think this is a gigantic chunk -- cannot decide if I would rather have less things in the
# environment or if I want more readable chunks. The benefit here I guess is that if I want to change something, I just have to come to this chunk
parr <- u_ben %>%
group_by(pcode3) %>%
summarise(beneficiarios = sum(beneficiarios)) %>%
ungroup() %>%
# count of organisations per pcode3
left_join(act_ben %>%
# filter(categoria != "Vacunacion") %>%
group_by(pcode3) %>%
summarise(org_count = n_distinct(org_implementadora))) %>%
# getting beneficiary frequencies, sector count and maximum multi-sector beneficiaries
left_join(act_ben %>%
# vaccination filtered out
filter(categoriadeactividad != "Vacunacion") %>%
group_by(ubicacion, desagregacion, sector) %>%
slice(which.max(beneficiarios)) %>%
ungroup() %>%
group_by(ubicacion, sector, pcode3) %>%
pivot_wider(names_from = sector, values_from = beneficiarios) %>%
replace_na(list(Nutricion = 0, Educacion = 0, WASH = 0, Salud = 0,
Seguridad_Alimentaria = 0, Proteccion_NNA = 0,
Proteccion_General = 0, Proteccion_GBV = 0)) %>%
group_by(pcode3, desagregacion) %>%
summarise(nutricion_ben = sum(Nutricion),
proteccion_ben = sum(Proteccion_NNA + Proteccion_General + Proteccion_GBV),
wash_ben = sum(WASH),
salud_ben = sum(Salud),
educacion_ben = sum(Educacion),
sa_ben = sum(Seguridad_Alimentaria), .groups = "drop") %>%
mutate(ben_freq = nutricion_ben + proteccion_ben + wash_ben + salud_ben +
educacion_ben + sa_ben,
ben_max = pmax(nutricion_ben, proteccion_ben, wash_ben,
salud_ben, educacion_ben, sa_ben),
ms_ben_max = ifelse(ben_max >= ben_freq - ben_max,
ben_freq - ben_max,
ben_max)) %>%
group_by(pcode3) %>%
summarise(nutricion_ben = sum(nutricion_ben),
proteccion_ben = sum(proteccion_ben),
wash_ben = sum(wash_ben),
salud_ben = sum(salud_ben),
educacion_ben = sum(educacion_ben),
sa_ben = sum(sa_ben),
ben_freq = sum(ben_freq),
ben_max = sum(ben_max),
ms_ben_max = sum(ms_ben_max)) %>%
mutate(sector_count = rowSums(select(., ends_with("_ben")) != 0))) %>%
filter(str_detect(pcode3, "^VE")) %>%
# right_join to the census data
right_join(read_excel("census data 20191122.xlsx", sheet = "data") %>%
clean_names() %>%
# selecting variables and renaming them with select
select(estado, pcode1, municipio, pcode2, parroquia, pcode3,
fo = field_office,
poblacion_2019 = x_2019_poblacion_parroquial_total,
hogares_2011 = numero_de_hogares,
ham_2019_ambitos_ge,
percent_pobre = ham_2019_xx_pobreza_env_por_parroquia,
pob_pobre = ham_2019_xx_poblacion_pobre_por_parroquia,
poblacion_total_2011,
poblacion_infantil_menor_de_12_anos, poblacion_adolescentes_de_12_a_17_anos,
poblacion_de_18_anos_y_mas,
percent_urbana = poblacion_urbana_percent,
area_km2,
densidad_ppl_km2 = densidad_poblacional_ppl_km2,
matricula_2017_educacion_inicial, matricula_2017_educacion_primaria,
matricula_2017_educacion_media, razon_de_dependencia_total,
razon_de_dependencia_de_menores_de_15_anos,
percent_sin_agua_segura = x_abast_agua2_percent_sin_agua_segura,
percent_sin_saneamiento_mejorado =
x_saneamiento_percent_sin_saneamiento_mejorado,
percent_analfabeto = percent_poblacion_10_anos_y_mas_analfabeta,
promedio_de_personas_por_vivienda,
percent_hogares_jefatura_femenina = percent_de_hogares_con_jefatura_femenina,
percent_sin_servicio_electrico =
servicio_electrico_percent_no_tiene_servicio_electrico,
ham_2019_x_violencia_envelope, ham_2019_x_mortalidad_y_salud_envelope,
ham_2019_x_pobreza_envelope, promedio_de_edad) %>%
mutate(estado = rm_accent(str_to_upper(estado)), # just to make sure
municipio = rm_accent(str_to_upper(municipio)),
parroquia = rm_accent(str_to_upper(parroquia))) %>%
# creating new disaggregation variables
mutate(pob_menor_de_18 = (poblacion_infantil_menor_de_12_anos +
poblacion_adolescentes_de_12_a_17_anos) /poblacion_total_2011 *
poblacion_2019,
pob_18_y_mas = poblacion_de_18_anos_y_mas / poblacion_total_2011 * poblacion_2019,
hogares_2019 = hogares_2011 * poblacion_2019 / poblacion_total_2011,
matricula_total = matricula_2017_educacion_inicial +
matricula_2017_educacion_primaria +
matricula_2017_educacion_media) %>%
# dividing columns by 100 so that they're between 0 and 1
mutate_at(vars(percent_analfabeto, percent_sin_servicio_electrico,
percent_sin_agua_segura,
percent_sin_saneamiento_mejorado,
percent_hogares_jefatura_femenina, percent_urbana,
razon_de_dependencia_total), ~(. / 100)) %>%
# mutating new columns with populations
mutate(pob_analfabeto = percent_analfabeto * poblacion_2019,
pob_sin_agua_segura = percent_sin_agua_segura * poblacion_2019,
pob_sin_servicio_electrico = percent_sin_servicio_electrico * poblacion_2019,
pob_sin_saneamiento_mejorado = percent_sin_saneamiento_mejorado * poblacion_2019,
pob_urbana = percent_urbana * poblacion_2019) %>%
select(-c(matricula_2017_educacion_inicial, matricula_2017_educacion_primaria,
matricula_2017_educacion_media, poblacion_total_2011, hogares_2011,
poblacion_infantil_menor_de_12_anos, poblacion_adolescentes_de_12_a_17_anos,
poblacion_de_18_anos_y_mas)),
by = "pcode3") %>%
# mutating new variables and making sure NAs become 0s
mutate(beneficiarios = ifelse(is.na(beneficiarios), 0, beneficiarios),
org_count = ifelse(is.na(org_count), 0, org_count),
sector_count = ifelse(is.na(sector_count), 0, sector_count),
educacion_ben = ifelse(is.na(educacion_ben), 0, educacion_ben),
nutricion_ben = ifelse(is.na(nutricion_ben), 0, nutricion_ben),
proteccion_ben = ifelse(is.na(proteccion_ben), 0, proteccion_ben),
salud_ben = ifelse(is.na(salud_ben), 0, salud_ben),
sa_ben = ifelse(is.na(sa_ben), 0, sa_ben),
wash_ben = ifelse(is.na(wash_ben), 0, wash_ben),
ms_ben_max = ifelse(is.na(ms_ben_max), 0, ms_ben_max),
ben_freq = ifelse(is.na(ben_freq), 0, ben_freq),
not_reached = pob_pobre - beneficiarios,
coverage_percent = beneficiarios / poblacion_2019,
coverage_pobre_percent = beneficiarios / pob_pobre,
percent_total_ben = beneficiarios / sum(beneficiarios),
multisector_percent = ms_ben_max / ben_freq,
org_present = ifelse(beneficiarios > 0, TRUE, FALSE),
pob_pobre_score = rescale(pob_pobre, to = c(0,1)),
percent_pobre_score = rescale(percent_pobre, to = c(0,1)),
poverty_score = (pob_pobre_score + percent_pobre_score) / 2)
# taking a subset of parr to only get parrishes where the number of beneficiaries does not exceed the number of poor persons
parr0 <- parr %>%
filter(not_reached >= 1) %>%
mutate(gap_score = (rescale(not_reached, to = c(0,1)) + percent_pobre_score) / 2)
# parrishes with negative poor persons are recoded as "0" so they won't mess up the scale
# even though this means that their tooltips are dropped
gaps_map <- parr %>%
right_join(pcode3_shape, by = "pcode3") %>%
st_as_sf() %>%
mutate(not_reached = ifelse(not_reached < 0.1, 0, not_reached)) %>%
mutate(not_reached = round(not_reached, digits = 0)) %>%
mutate_at(vars(percent_pobre, percent_urbana), ~(round(., digits = 2))) %>%
ggplot() +
geom_sf(aes(fill = not_reached,
text = paste0(parroquia,",", "\n",
municipio, ",", "\n",
estado, "\n",
"not reached: ", not_reached, "\n",
"org count: ", org_count, "\n",
"poverty incidence: ", percent_pobre, "\n",
"percent urban: ", percent_urbana)),
size = 0.1) +
scale_fill_viridis_c(option = "turbo", trans = "log10") +
theme_void() +
theme(legend.title = element_text(size = 7),
legend.text = element_text(size = 7),
plot.title = element_text(size = 10)) +
labs(fill = "Poor persons \nnot reached") +
ggtitle("Map of parrishes by gaps in population reached")
# so are you saying that if I change the fill to viridis in the later plot, I can use hoveron = fill?
# no you can't.
ggplotly(gaps_map, tooltip = c("text")) %>%
layout(showlegend = TRUE, legend = list(font = list(size = 6))) %>%
style(hoveron = "fill") %>%
layout(title = list(text = paste0("Map of parrishes by number poor persons not reached",
"<br>",
"<sup>",
"mouse over for details; drag and click to select and zoom; double-click legend select/deselect","</sup>")))
Nationwide, 11,633,901 poor persons have not been reached by response activities – this means that 94.6% all the poor persons in the country have yet to be reached.
As a starting point, all 1109 parrishes have been split into three groups – over, where the number of unique beneficiaries reached exceeds the number of poor persons in that parrish; under, where the coverage is less than the number of poor persons; and no coverage, comprising a total of 530 parrishes, where no activities have occurred.
However, it should be noted that a total of 11,633,901 poor persons reside in the 530 parrishes that have not been reached, this is only 25% of the 11,170,010 poor persons not covered by response activities. This indicates that 1) there is much room to expand in the parrishes where we are already present and that 2) sparely populated, remote and, consequently, poorer parrishes have, so far, been left out of the response.
parr %>%
mutate(coverage_type = case_when(not_reached <= 0 ~ "over",
not_reached > 0 & beneficiarios >= 1 ~ "under",
beneficiarios == 0 ~ "no_coverage")) %>%
group_by(coverage_type) %>%
summarise(parroquias = n(),
beneficiarios = sum(beneficiarios),
not_reached = sum(not_reached),
avg_org_count = mean(org_count),
percent_pobre = (sum(pob_pobre)) / (sum(poblacion_2019)),
percent_urbana = (sum(pob_urbana)) / (sum(poblacion_2019)),
percent_sin_agua_segura = (sum(pob_sin_agua_segura)) / (sum(poblacion_2019)),
percent_sin_saneamiento_mejorado = (sum(pob_sin_saneamiento_mejorado)) /
(sum(poblacion_2019)),
avg_sector_count = mean(sector_count)) %>%
gather(key = variable, value = value, 2:ncol(.)) %>%
spread_(key = names(.)[1], value = 'value') %>%
relocate(no_coverage, .after = under) %>%
pander(big.mark = ",")
| variable | over | under | no_coverage |
|---|---|---|---|
| avg_org_count | 7.818 | 2.718 | 0 |
| avg_sector_count | 4 | 1.764 | 0 |
| beneficiarios | 658,812 | 658,257 | 0 |
| not_reached | -463,891 | 8,838,144 | 2,795,757 |
| parroquias | 11 | 568 | 530 |
| percent_pobre | 0.2029 | 0.3743 | 0.4602 |
| percent_sin_agua_segura | 0.02017 | 0.1318 | 0.2304 |
| percent_sin_saneamiento_mejorado | 0.01024 | 0.0697 | 0.1813 |
| percent_urbana | 0.9948 | 0.9302 | 0.6795 |
We note that the 11 parrishes in the over category are much less poor and much more urban despite having 50% of all beneficiaries. These parrishes are shown in the table below. And, as can be seen from not_reached, the number beneficiaries in the over category has greatly exceeded the number of poor persons.
Parrishes (admin level 3) will be the main administrative unit of reference. Unlike in the 5W cleaning and reporting document, where we focused on municipalities, greater precision is needed for a coverage and gaps analysis. Furthermore, we should always endeavour to maximise the usage of all available data.
The 11 parrishes below will largely be excluded in the remainder of this report as it is clear that no further resources should be allocated to them:
parr %>%
mutate(coverage_type = case_when(not_reached <= 0 ~ "over",
not_reached > 0 & beneficiarios >= 1 ~ "under",
beneficiarios == 0 ~ "not_reached")) %>%
filter(coverage_type == "over") %>%
select(estado, municipio, estado, parroquia, beneficiarios, pob_pobre) %>%
mutate(coverage_percent = beneficiarios / pob_pobre * 100) %>%
arrange(desc(beneficiarios)) %>%
pander(big.mark = ",")
| estado | municipio | parroquia | beneficiarios | pob_pobre | coverage_percent |
|---|---|---|---|---|---|
| DISTRITO CAPITAL | LIBERTADOR | ALTAGRACIA | 321,597 | 6,244 | 5,151 |
| MIRANDA | SUCRE | PETARE | 178,814 | 82,058 | 217.9 |
| BOLIVAR | CARONI | ONCE DE ABRIL | 69,332 | 54,118 | 128.1 |
| BOLIVAR | HERES | CATEDRAL | 32,177 | 14,336 | 224.4 |
| BOLIVAR | HERES | VISTA HERMOSA | 18,477 | 13,919 | 132.7 |
| MIRANDA | CHACAO | CHACAO | 13,780 | 5,481 | 251.4 |
| TACHIRA | ANDRES BELLO | CAPITAL CORDERO | 8,596 | 7,766 | 110.7 |
| MIRANDA | SUCRE | LEONCIO MARTINEZ | 6,834 | 4,792 | 142.6 |
| CARABOBO | VALENCIA | URBANA CANDELARIA | 5,043 | 4,589 | 109.9 |
| TACHIRA | BOLIVAR | GENERAL JUAN VICENTE GOMEZ | 2,253 | 362.1 | 622.3 |
| DELTA AMACURO | TUCUPITA | SAN JOSE | 1,909 | 1,256 | 152 |
Though it should be mentioned that it is likely that partners have reported activities which occurred in other parts of the capital in Altagracia, as the total number of benefificaries reached in the whole of Distrito Capital is only # A tibble: 1 x 1, beneficiarios,
mouse over plot for more details
# ref for printing state_ord. I'm really not sure how to extract all the variables as a list
# parr0 %>%
# group_by(estado) %>%
# summarise(not_reached = sum(not_reached)) %>%
# arrange(desc(not_reached)) %>%
# select(estado) %>% as.list(as.data.frame(t(.)))
state_ord <- c("ZULIA", "LARA", "CARABOBO", "MIRANDA", "ANZOATEGUI", "ARAGUA", "BOLIVAR",
"PORTUGUESA", "SUCRE", "GUARICO", "FALCON", "MONAGAS", "BARINAS", "MERIDA",
"TACHIRA", "TRUJILLO", "YARACUY", "APURE", "DISTRITO CAPITAL", "NUEVA ESPARTA",
"COJEDES", "VARGAS", "DELTA AMACURO", "AMAZONAS")
stack_text <- parr0 %>%
group_by(estado) %>%
summarise(beneficiarios = sum(beneficiarios),
total = sum(pob_pobre)) %>%
mutate(percent_reached = round(beneficiarios / total * 100, digits = 2)) %>%
arrange(desc(total))
state_stack <- parr0 %>%
select(estado, beneficiarios, not_reached) %>%
group_by(estado) %>%
summarise(beneficiarios = round(sum(beneficiarios), digits = 0),
not_reached = round(sum(not_reached), digits = 0)) %>%
pivot_longer(c(beneficiarios,not_reached),
names_to = "pob_type", values_to = "total") %>%
ggplot(aes(x = estado, y = total)) +
geom_col(aes(fill = pob_type)) +
scale_y_continuous(label = comma) +
scale_x_discrete(limits = state_ord) +
scale_fill_manual(values = c("coral", "cornflowerblue")) +
geom_text(data = stack_text, aes(y = 20000,
label = percent_reached), size = 2) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5),
axis.text.y = element_text(size = 5),
axis.title.y = element_text(size = 8),
plot.title = element_text(size = 11)) +
xlab("") + ylab("Poor persons") +
labs(fill = "",
title = "Barplot of poor persons by state")
ggplotly(state_stack) %>%
layout(legend = list(font = list(size = 6))) %>%
config(displayModeBar = FALSE)
The states where the highest proportions of poor persons have been reached (listed at the bottom of each bar) are Distrito Capital, Tachira, Bolivar, Delta Amacuro, Miranda and Zulia. Carabobo has the lowest percentage of its poor population covered. On average, after the exclusion of the top 11 parrishes, 10.5% of poor persons have been reached countrywide.
Whilst there are many poor persons yet to be reached in states where we have relatively high coverage, there is a need to ensure that our operational footprint and the consequent resources allocated are equitable – the crisis in Venezuela is nationwide and unlike an earthquake or a typhoon where there is an epicentre or a stormpath, there is no programmatic rationale to only focus on a few areas.
Let us now move down to a lower level of granularity as state-level analysis is still quite superficial:
From the scatterplot below – where each point is a parrish – we see that there is great variation in the number of poor persons not covered (size) as well as how concentrated they are in a given parrish (y-axis, poverty incidence); both these factors weigh heavily in programming strategies as well as in the ease of beneficiary selection.
We also observe that the parrishes with the greatest numbers of not covered (found at poor persons: 10,000-100,000; poverty incidence: 0.25-0.50) have a much higher than average number of organisations present (colour). This means that operational barriers are much lower in accessing these populations than the parrishes in light blue found in the middle of the plot.
parrplot <- parr0 %>%
mutate_at(vars(pob_pobre, not_reached, org_count), ~(round(.))) %>%
mutate(percent_pobre = round(percent_pobre, digits = 2))%>%
ggplot(aes(x = not_reached, y = percent_pobre,
colour = org_count,
text = paste0(parroquia, ", ", estado))) +
geom_jitter(aes(size = not_reached), alpha = 0.75) +
scale_colour_gradientn(
colours = c("cornflowerblue", "tomato", "firebrick")) +
scale_x_continuous(trans = "log10", labels = comma) +
scale_size_continuous(range = c(0.3, 5)) +
xlab("Not covered poor") + ylab("Poverty incidence") +
labs(colour = "Number of \norganisations",
title = "Scatterplot of parrishes by poor persons not covered and poverty incidence") +
theme(legend.title = element_text(size = 7),
legend.text = element_text(size = 7),
plot.title = element_text(size = 11),
axis.title = element_text(size = 8.5))
ggplotly(parrplot, tooltip = c("y", "x", "size", "text", "colour")) %>%
layout(showlegend = TRUE, legend = list(font = list(size = 7))) %>%
config(displayModeBar = FALSE) %>%
layout(title = list(text = paste0(
"Scatterplot of parrishes by number poor persons not reached and poverty incidence",
"<br>",
"<sup>",
"size: number of poor persons not reached; colour: number of organisations present; drag and click to select and zoom","</sup>")))
Just because two Clusters operate in the same area do not mean their beneficiaries coincide. As an estimate, we calculated a theoretical maximum number of multi-sector beneficiaries per parrish, expressed below as multi_sector_ben (exact calculation and explanation in the code chunk below). Parrishes have then been split into groups based on the number of sectors present within them:
# calculation for multi-sector beneficiaries.
# Basically, beneficiaries per parrish are aggregated into
# sector subtotals and a beneficiary frequency total.
# The maximum value of the sector subtotals is compared against the beneficiary frequency total,
# if the maximum value is equal to the frequency total, then there is only one sector,
# if the maximum value is less than the frequency total,
# the difference between the two (or the sum of all other sector subtotals)
# becomes the theoretical maximum number of multisector beneficiaries.
# Performing this calculation at admin level 3 makes sense as a parrish is small enough
# that there it is realistic to assume that overlaps in beneficiaries between sectors exist --
# i.e. that females under 18 in a parrish who are beneficiaries of nutrition
# and females under 18 in that same parrish who are beneficiaries of WASH are the same people.
# Although I do feel this calculation to be very charitable
# We can't really do much more unless there is a beneficiary register.
# The real number of multisector beneficiaries is likely MUCH LOWER
# but that can only be verified through sampled large-scale post-distribution/post-intervention monitoring,
# which is extremely rare.
# I actually could have raised this with the third-party monitors that UNICEF,
# so it's an oversight on my part as well.
act_ben %>%
# vaccination filtered out
filter(categoriadeactividad != "Vacunacion") %>%
group_by(ubicacion, desagregacion, sector) %>%
slice(which.max(beneficiarios)) %>%
ungroup() %>%
group_by(ubicacion, sector, pcode3) %>%
pivot_wider(names_from = sector, values_from = beneficiarios) %>%
replace_na(list(Nutricion = 0, Educacion = 0, WASH = 0, Salud = 0,
Seguridad_Alimentaria = 0, Proteccion_NNA = 0,
Proteccion_General = 0, Proteccion_GBV = 0)) %>%
group_by(pcode3, desagregacion) %>%
summarise(nutricion_ben = sum(Nutricion),
proteccion_ben = sum(Proteccion_NNA + Proteccion_General + Proteccion_GBV),
wash_ben = sum(WASH),
salud_ben = sum(Salud),
educacion_ben = sum(Educacion),
sa_ben = sum(Seguridad_Alimentaria), .groups = "drop") %>%
mutate(ben_freq = nutricion_ben + proteccion_ben + wash_ben + salud_ben +
educacion_ben + sa_ben,
ben_max = pmax(nutricion_ben, proteccion_ben, wash_ben,
salud_ben, educacion_ben, sa_ben),
ms_ben_max = ifelse(ben_max >= ben_freq - ben_max,
ben_freq - ben_max,
ben_max)) %>%
group_by(pcode3) %>%
summarise(nutricion_ben = sum(nutricion_ben),
proteccion_ben = sum(proteccion_ben),
wash_ben = sum(wash_ben),
salud_ben = sum(salud_ben),
educacion_ben = sum(educacion_ben),
sa_ben = sum(sa_ben),
ben_freq = sum(ben_freq),
ben_max = sum(ben_max),
ms_ben_max = sum(ms_ben_max), .groups = "drop") %>%
mutate(sector_count = rowSums(select(., ends_with("_ben")) != 0)) %>%
group_by(sector_count) %>%
# filtering out the parrishes with oversubscription
filter(pcode3 %out% c("VE010101", "VE070104", "VE070502", "VE070506", "VE081401", "VE100401",
"VE150701", "VE151901", "VE151905", "VE200101", "VE200403")) %>%
summarise(parroquias = n(),
multi_sector_ben = sum(ms_ben_max),
one_sector_ben = sum(ben_freq) - sum(ms_ben_max),
multisector_percent = round(sum(ms_ben_max) / sum(ben_freq) * 100, digits = 2),
.groups = "drop") %>%
relocate(multisector_percent, .after = one_sector_ben) %>%
pander()
| sector_count | parroquias | multi_sector_ben | one_sector_ben | multisector_percent |
|---|---|---|---|---|
| 1 | 348 | 0 | 83,848 | 0 |
| 2 | 102 | 16,336 | 89,572 | 15.42 |
| 3 | 60 | 21,887 | 95,955 | 18.57 |
| 4 | 40 | 64,445 | 153,207 | 29.61 |
| 5 | 17 | 34,333 | 93,604 | 26.84 |
| 6 | 5 | 14,072 | 22,881 | 38.08 |
Overall, the results are not encouraging – only 21.9% of all beneficiaries have received multi-sector support. When vaccinations are included, the percentage drops to 8.7%. But we will exclude vaccinations for this analysis as their footprint is determined by government priorities, which may or may not align with the humanitarian imperative; furthermore, the government was not able to provide vaccination records at the parrish level, many times defaulting to municipal or state-level reporting.
As a note number of organisations present in a parrish is highly correlated with the number of sectors present there (a correlation coefficient of 0.913). A scatterplot would be repetitive and very similar to the one in section 3b and not deepen our understanding of the actual coverage.
However, for agencies truly unable to expand outside their current footprints, there are still many beneficiaires who are not covered or have only been reached with the interventions of one sector.
ms_scatter <- parr0 %>%
group_by(estado) %>%
summarise(multi_sector_ben = sum(ms_ben_max),
one_sector_ben = round(sum(ben_freq) - sum(ms_ben_max), digits = 0),
ben_freq = round(sum(ben_freq), digits = 0)) %>%
mutate(multisector_percent = round(multi_sector_ben / ben_freq * 100, digits = 1)) %>%
ggplot(aes(x = ben_freq,
y = multisector_percent)) +
geom_point(aes(size = one_sector_ben),
alpha = 0.8, colour = "coral") +
geom_text(aes(label = estado), size = 1.5) +
scale_x_continuous(trans = "log10", labels = comma) +
scale_size_continuous(range = c(0.3, 10)) +
xlab("Beneficiary frequencies") + ylab("Percentage received multi-sector support") +
labs(title = "Scatterplot of states by beneficiary frequencies and multi-sector coverage of beneficiaries")+
theme(plot.title = element_text(size = 11),
axis.title = element_text(size = 8.5))
ggplotly(ms_scatter, tooltip = c("x", "y", "text", "size")) %>%
layout(legend = list(font = list(size = 6))) %>%
config(displayModeBar = FALSE) %>%
layout(title = list(text = paste0(
"Scatterplot of states by beneficiary frequencies and multi-sector coverage of beneficiaries",
"<br>",
"<sup>",
"size: beneficiaries who only received support from one sector; mouse over for details","</sup>")))
As the leader of the Education, Nutrition, WASH and Child Protection Clusters, UNICEF supported activities reaching 95% of all beneficiaries. Meaning that this the low percentage of multi-sector support could be resolved almost entirely by better inter-section coordination and better programmatic oversight within UNICEF. In fact, we note the seeds of this behaviour by the higher rates of multi-sector coverage in the states where UNICEF has offices (Bolivar, Tachira, Zulia and Distrito Capital).
Additionally, 87% of all beneficiaries are from the states of Distrito Capital, Miranda, Tachira, Bolivar and Zulia, indicating that there is significant convergence in the geographical reaches of individual implementing partners.
For those looking for specific parrishes, there is a reference table at the end of this document.
Before we examine each cluster in more detail, here is first an overview of the geographic reach of each cluster:
act_ben %>%
filter(sector != "Seguridad_Alimentaria" & categoria != "VACUNACION") %>%
mutate(sector = ifelse(str_detect(sector, "Proteccion_GBV|Proteccion_General|Proteccion_NNA"),
"Proteccion", sector)) %>%
# mutate(sector = ifelse(categoria == "VACUNACION", "Vacunacion", sector))
# we're not going to look at vaccination in this report
group_by(sector) %>%
summarise(parrishes = n_distinct(pcode3),
beneficiary_frequencies = sum(beneficiarios)) %>%
rename(cluster = sector) %>%
pander(caption = "Cluster summary, vaccination exluded", big.mark = ",", style = "rmarkdown",
justify = c("left", "right", "right"))
| cluster | parrishes | beneficiary_frequencies |
|---|---|---|
| Educacion | 186 | 624,908 |
| Nutricion | 497 | 352,248 |
| Proteccion | 151 | 183,014 |
| Salud | 62 | 54,800 |
| WASH | 126 | 809,202 |
The following tables summarise the number of parrishes and beneficiary frequencies reached through the various combinations of different clusters – we use frequencies here, as each individual might have benefitted from multiple combinations of clusters. Each subsection lists the possible pairings of a cluster:
combination refers to the various cluster-wise pairs that exist;parrishes indicates the number of parrishes each combination is present in;clust-freq shows the number of beneficiary frequencies reached by that cluster%_subtotal indicates the percentage each combination forms of the subtotal of cluster beneficiary frequenciespair_sum shows the total number of beneficiary frequencies in that pair i.e. the pair_sum for edu_nut would be the sum of education and nutrition beneficiaries.%_ms_max shows the maximum percentage of multisector beneficiaries of each pair i.e. if the pair education-nutrition has 10 education beneficiaries and 30 nutrition beneficiaries, the maximum number of beneficiaries which received support from both sectors is 10, resulting in a %_ms_max of 25%. But, as mentioned in the notes for section 4a, this is a theoretical maximum and the actual level of coincidence is likely much lower. # creation of reference df for the cluster combinations
clust_com <- parr %>%
filter(ben_freq != 0) %>%
select(pcode3, ben_freq, educacion_ben, nutricion_ben, salud_ben, wash_ben, proteccion_ben) %>%
# mutate a new column for each combination of sectors -- if edu is the first cluster in the combination,
# only education beneficiaries will be used to fill values in the column
mutate(edu_alone = ifelse(educacion_ben == ben_freq, educacion_ben, 0),
edu_nut = ifelse(educacion_ben > 0 & nutricion_ben > 0, educacion_ben, 0),
edu_sal = ifelse(educacion_ben > 0 & salud_ben > 0, educacion_ben, 0),
edu_wash = ifelse(educacion_ben > 0 & wash_ben > 0, educacion_ben, 0),
edu_prot = ifelse(educacion_ben > 0 & proteccion_ben > 0, educacion_ben, 0),
nut_alone = ifelse(nutricion_ben == ben_freq, nutricion_ben, 0),
nut_edu = ifelse(educacion_ben > 0 & nutricion_ben > 0, nutricion_ben, 0),
nut_sal = ifelse(nutricion_ben > 0 & salud_ben > 0, nutricion_ben, 0),
nut_wash = ifelse(nutricion_ben > 0 & wash_ben > 0, nutricion_ben, 0),
nut_prot = ifelse(nutricion_ben > 0 & proteccion_ben > 0, nutricion_ben, 0),
sal_alone = ifelse(salud_ben == ben_freq, salud_ben, 0),
sal_edu = ifelse(educacion_ben > 0 & salud_ben > 0, salud_ben, 0),
sal_nut = ifelse(nutricion_ben > 0 & salud_ben > 0, salud_ben, 0),
sal_wash = ifelse(salud_ben > 0 & wash_ben > 0, salud_ben, 0),
sal_prot = ifelse(proteccion_ben > 0 & salud_ben > 0, salud_ben, 0),
wash_alone = ifelse(wash_ben == ben_freq, wash_ben, 0),
wash_edu = ifelse(educacion_ben > 0 & wash_ben > 0, wash_ben, 0),
wash_nut = ifelse(nutricion_ben > 0 & wash_ben > 0, wash_ben, 0),
wash_sal = ifelse(salud_ben > 0 & wash_ben > 0, wash_ben, 0),
wash_prot = ifelse(wash_ben > 0 & proteccion_ben > 0, wash_ben, 0),
prot_alone = ifelse(proteccion_ben == ben_freq, proteccion_ben, 0),
prot_edu = ifelse(educacion_ben > 0 & proteccion_ben > 0, proteccion_ben, 0),
prot_nut = ifelse(nutricion_ben > 0 & proteccion_ben > 0, proteccion_ben, 0),
prot_sal = ifelse(proteccion_ben > 0 & salud_ben > 0, proteccion_ben, 0),
prot_wash = ifelse(wash_ben > 0 & proteccion_ben > 0, proteccion_ben, 0)) %>%
# pivot_longer to the clust_freq column
pivot_longer(names_to = "combination", values_to = "clust_freq", 8:32) %>%
# mutating new cluster column for filtering later
mutate(cluster = case_when(str_detect(combination, "^edu") ~ "educacion",
str_detect(combination, "^nut") ~ "nutricion",
str_detect(combination, "^sal") ~ "salud",
str_detect(combination, "^wash") ~ "wash",
str_detect(combination, "^prot") ~ "proteccion")) %>%
# calculating the sum of frequencies in each pair
mutate(pair_freq =
case_when(
str_detect(combination, "edu_nut|nut_edu") ~ educacion_ben + nutricion_ben,
str_detect(combination, "edu_sal|sal_edu") ~ educacion_ben + salud_ben,
str_detect(combination, "edu_wash|wash_edu") ~ educacion_ben + wash_ben,
str_detect(combination, "edu_prot|prot_edu") ~ educacion_ben + proteccion_ben,
str_detect(combination, "nut_sal|sal_nut") ~ nutricion_ben + salud_ben,
str_detect(combination, "nut_wash|wash_nut") ~ nutricion_ben + wash_ben,
str_detect(combination, "nut_prot|prot_nut") ~ nutricion_ben + proteccion_ben,
str_detect(combination, "sal_wash|wash_sal") ~ salud_ben + wash_ben,
str_detect(combination, "sal_prot|prot_sal") ~ salud_ben + proteccion_ben,
str_detect(combination, "prot_wash|wash_prot") ~ proteccion_ben + wash_ben,
TRUE ~ NA_real_)) %>%
mutate(pair_freq = ifelse(str_detect(combination, "alone$"), 0, pair_freq)) %>%
select(pcode3, ben_freq, combination, cluster, clust_freq, pair_freq)
clust_com %>%
filter(cluster == "educacion" & clust_freq != 0) %>%
group_by(combination) %>%
summarise(parrishes = n(),
clust_freq = sum(clust_freq),
pair_sum = sum(pair_freq)) %>%
mutate(ms_max = round(pmin(clust_freq, abs(clust_freq - pair_sum)), digits = 1),
`%_ms_max` = round(ms_max / pair_sum * 100, digits = 1),
`%_ms_max` = ifelse(is.infinite(`%_ms_max`), 0, `%_ms_max`)) %>%
pander(caption = "Education cluster combinations", big.mark = ",", style = "rmarkdown",
justify = c("left", "right", "right", "right", "right", "right"))
| combination | parrishes | clust_freq | pair_sum | ms_max | %_ms_max |
|---|---|---|---|---|---|
| edu_alone | 33 | 26,570 | 0 | 26,570 | 0 |
| edu_nut | 132 | 241,390 | 306,424 | 65,034 | 21.2 |
| edu_prot | 81 | 183,820 | 296,182 | 112,362 | 37.9 |
| edu_sal | 38 | 82,489 | 116,730 | 34,241 | 29.3 |
| edu_wash | 83 | 204,964 | 911,894 | 204,964 | 22.5 |
The most common pairing for Education was with the Nutrition cluster – they coincide in 132 parrishes. However, there are no concrete programmatic links programmatically between the two, with Nutrition focusing on persons older or younger than the majority of Education’s beneficiaries. However, Protection and WASH both have explicit programmatic links (in the logframe) with Education and 81 and 83 parrishes respectively. A fruitful avenue of investigation would be how many beneficiaries of Education also benefitted from Protection interventions and how close it is to the 37.9% theoretical maximum.
clust_com %>%
filter(cluster == "nutricion" & clust_freq != 0) %>%
group_by(combination) %>%
summarise(parrishes = n(),
clust_freq = sum(clust_freq),
pair_sum = sum(pair_freq)) %>%
mutate(`%_subtotal` = round(clust_freq / sum(clust_freq) * 100, digits = 1),
`%_ms_max` = round(pmin(clust_freq, abs(clust_freq - pair_sum)) / pair_sum * 100, digits = 1),
`%_ms_max` = ifelse(is.infinite(`%_ms_max`), 0, `%_ms_max`)) %>%
relocate(`%_subtotal`, .after = clust_freq) %>%
pander(caption = "Nutrition cluster combinations", big.mark = ",", style = "rmarkdown",
justify = c("left", "right", "right", "right", "right", "right"))
| combination | parrishes | clust_freq | %_subtotal | pair_sum | %_ms_max |
|---|---|---|---|---|---|
| nut_alone | 282 | 48,576 | 19.3 | 0 | 0 |
| nut_edu | 132 | 65,034 | 25.9 | 306,424 | 21.2 |
| nut_prot | 125 | 57,871 | 23 | 203,461 | 28.4 |
| nut_sal | 48 | 25,875 | 10.3 | 62,717 | 41.3 |
| nut_wash | 97 | 53,753 | 21.4 | 758,861 | 7.1 |
Nutrition operates alone in 282 parrishes out of the 497 that it operates in, this is the most out of any of the other clusters – it is necessary to evaluate the extent to which other clusters can make use of the footholds established by Nutrition. In this light, it seems that its high level of coincidence with Education was due more to its wide operational presence rather than due to concerted coordination. The highest possibility of overlap is with Health, due to 1) their respective beneficiary frequencies not differing as much as with the other clusters and 2) the numerous programmatic links between the two. However, has the lowest number of beneficiaries in parrishes wherre health is also present.
clust_com %>%
filter(cluster == "salud" & clust_freq != 0) %>%
group_by(combination) %>%
summarise(parrishes = n(),
clust_freq = sum(clust_freq),
pair_sum = sum(pair_freq)) %>%
mutate(`%_subtotal` = round(clust_freq / sum(clust_freq) * 100, digits = 1),
`%_ms_max` = round(pmin(clust_freq, abs(clust_freq - pair_sum)) / pair_sum * 100, digits = 1),
`%_ms_max` = ifelse(is.infinite(`%_ms_max`), 0, `%_ms_max`)) %>%
relocate(`%_subtotal`, .after = clust_freq) %>%
pander(caption = "Health cluster combinations, excluding vaccinations",
big.mark = ",", style = "rmarkdown",
justify = c("left", "right", "right", "right", "right", "right"))
| combination | parrishes | clust_freq | %_subtotal | pair_sum | %_ms_max |
|---|---|---|---|---|---|
| sal_alone | 3 | 142 | 0.1 | 0 | 0 |
| sal_edu | 38 | 34,241 | 24.8 | 116,730 | 29.3 |
| sal_nut | 48 | 36,842 | 26.7 | 62,717 | 41.3 |
| sal_prot | 34 | 34,339 | 24.9 | 85,516 | 40.2 |
| sal_wash | 32 | 32,372 | 23.5 | 331,648 | 9.8 |
Without vaccination interventions, health has the smallest footprint of any of the clusters, covering 62 parrishes and 54800 beneficiary frequencies. It coincides the most with Nutrition and Protection and has numerous programmatic opportunities to coordinate with both at the intervention level with its focus on obstetric, antenatal and neonatal care as well as with HIV diagnosis and treatment.
clust_com %>%
filter(cluster == "wash" & clust_freq != 0) %>%
group_by(combination) %>%
summarise(parrishes = n(),
clust_freq = sum(clust_freq),
pair_sum = sum(pair_freq)) %>%
mutate(`%_subtotal` = round(clust_freq / sum(clust_freq) * 100, digits = 1),
`%_ms_max` = round(pmin(clust_freq, abs(clust_freq - pair_sum)) / pair_sum * 100, digits = 1),
`%_ms_max` = ifelse(is.infinite(`%_ms_max`), 0, `%_ms_max`)) %>%
relocate(`%_subtotal`, .after = clust_freq) %>%
pander(caption = "WASH cluster combinations", big.mark = ",", style = "rmarkdown",
justify = c("left", "right", "right", "right", "right", "right"))
| combination | parrishes | clust_freq | %_subtotal | pair_sum | %_ms_max |
|---|---|---|---|---|---|
| wash_alone | 12 | 5,663 | 0.2 | 0 | 0 |
| wash_edu | 83 | 706,930 | 29.4 | 911,894 | 22.5 |
| wash_nut | 97 | 705,108 | 29.3 | 758,861 | 7.1 |
| wash_prot | 67 | 689,628 | 28.7 | 798,852 | 13.7 |
| wash_sal | 32 | 299,276 | 12.4 | 331,648 | 9.8 |
Almost none of WASH’s beneficiary frequencies occurred in parrishes where no other clusters were present. Its great reach and blanket coverage (especially water supply and other community-level activities) mean that other clusters operating in the same areas as WASH are “guaranteed” to reach beneficiaries with multi-sector programming – the challenges being 1. the intentionality of the multi-sector coverage and 2. matching the scale of WASH activities. WASH has excellent programmatic overlap with all other clusters.
clust_com %>%
filter(cluster == "proteccion" & clust_freq != 0) %>%
group_by(combination) %>%
summarise(parrishes = n(),
clust_freq = sum(clust_freq),
pair_sum = sum(pair_freq)) %>%
mutate(`%_subtotal` = round(clust_freq / sum(clust_freq) * 100, digits = 1),
`%_ms_max` = round(pmin(clust_freq, abs(clust_freq - pair_sum)) / pair_sum * 100, digits = 1),
`%_ms_max` = ifelse(is.infinite(`%_ms_max`), 0, `%_ms_max`)) %>%
relocate(`%_subtotal`, .after = clust_freq) %>%
pander(caption = "Protection cluster combinations", big.mark = ",", style = "rmarkdown",
justify = c("left", "right", "right", "right", "right", "right"))
| combination | parrishes | clust_freq | %_subtotal | pair_sum | %_ms_max |
|---|---|---|---|---|---|
| prot_alone | 15 | 2,796 | 0.7 | 0 | 0 |
| prot_edu | 81 | 112,362 | 26.7 | 296,182 | 37.9 |
| prot_nut | 125 | 145,590 | 34.6 | 203,461 | 28.4 |
| prot_sal | 34 | 51,177 | 12.2 | 85,516 | 40.2 |
| prot_wash | 67 | 109,224 | 25.9 | 798,852 | 13.7 |
Like WASH and health, Protection has very limited beneficiary frequencies in parrishes where it operates alone. Protection coincides the most with Nutrition – this should serve as an impulse for the creation of referral pathways between the two since both carry out screening activities; additionally, both manage some form of beneficiary-level database. This cluster has the most explicit progammatic links to Education.
To close, the state of multi-sector programming is poor. There is little intentionality in which areas have multi-sector coverage and which areas do not – multi-sector links do exist at the activity level, but this is a poor approximation of developing a programme based on the specific needs of a target area. We see much better multi-sector coverage in the areas where UNICEF has offices, indicating that field offices should be afforded greater primacy in dictating cluster footprints in their respective areas.
To aid prioritisation of parrishes – that is, to determine where we should be working – it is necessary to split them up into more easily digestible groups – we will use decision trees to do this. A prioritisation score is also another commonly-used prioritisation tool, but, as we will see, collapsing a number of variables down into one score is not always helpful.
To understand how a decision tree functions, let us construct one to predict whether or not there is a humanitarian agency present in a parrish (org_present – this is our dependent variable). We supply the model our basket of indicators from which it will construct a model to predict our dependent variable – unhide the code below to see the full model – but we mostly fed the model the Census indicators. The decision tree printed below is the result:
# just to show the decision tree of how partners seem to have chosen locations.
# using full parr dataset for the tree
# no doubt there are other factors, but this is the data I have --
# looking at specific partner characteristics would be interesting.
set.seed(3000)
tree2 <- parr %>%
rpart(org_present ~ percent_pobre + percent_urbana + densidad_ppl_km2 +
razon_de_dependencia_de_menores_de_15_anos + razon_de_dependencia_total +
percent_sin_agua_segura + percent_sin_saneamiento_mejorado +
percent_sin_servicio_electrico + percent_analfabeto + percent_hogares_jefatura_femenina,
promedio_de_personas_por_vivienda, data = ., minbucket = 100)
fancyRpartPlot(tree2, digits = -3, sub = "", palettes = "Blues", type = 2)
To understand the plot above, all parrishes have been split into four groups (the terminal nodes at the bottom marked 4, 5, 6 and 7) based on the percentage of parrishes in each node where humanitarian agencies are present. Each bubble has three figures – for instance, the root, at the top, and marked [1], shows that on average, 0.524 or 52.4% of all parrishes have humanitarian agencies present in them. The next numbers, “n = 1109” shows that 1109 parrishes are in that group and next to it is the percentage of parrishes it contains, which, since it is the root, is 100%.
We see that 7 (in dark blue) the node with the highest concentration of parrishes with agencies present (82.8%), consists of parrishes more than 79.4% urbanised and denser than 187 ppl/km2. And 4, the node with the lowest concentration of parrishes with agencies (22.6%) is less than 79.4% urbanised and less than 21.8% urbanised.
Perhaps it is understandable that the most heavily populated parrishes have greater concentrations of organisations, though population density and urban population are both negatively correlated with poverty incidence. And this is, of course, not to imply that this actually depicts partners’ decision-making process, just that these are the factors towards which we, as a response, are predisposed.
The largest determinants of the number of beneficiaries reached per parrish are population density and percentage urban, as beneficiary numbers tend to scale in line with larger populations.
Several trees were built and trialled to split parrishes into targetting groups. As mentioned, the independent variables come from a pool of indicators from the census dataset, with some originating from the 2019 UNICEF Municipal Prioritisation Tool, which was a Principal Components Analysis of key variables related to poverty, health and mortality and violence and insecurity. After numerous iterations, tree3 was chosen and it splits parrishes into groups according to the:
To see the specific variables and formulae used for each of the major iterations, as well as additional notes on the development and application of decision trees, unhide the source code below.
# As opposed to a prioritisation score -- typically the weighted average of several demographic and socioeconomic indicators -- a tree is much better at accounting for the variations across geographic areas. A partner might not have the capacity to work outside of urban areas or another might have specific geographic biases and decision trees are a good tool to make the best possible targetting decisions within one's constraints.
# With that in mind, tree3 was developed to aid future prioritisation. The independent variable it strives to predict is the poverty score, which, as mentioned, is just the rescaled average of number of poor persons and poverty incidence. The performance of tree3 was considered superior to both tree1 (whose indendepent variable is just the absolute number of poor persons) and tree4 due to its ability clearly distinguish its groups of parrishes and because it is not dependent on gaps data -- meaning it will not shift when the 5Ws are updated.
set.seed(3000)
# number of not covered poor persons
tree1 <- parr0 %>%
rpart(not_reached ~ estado + percent_pobre + percent_urbana +
densidad_ppl_km2 + razon_de_dependencia_de_menores_de_15_anos +
razon_de_dependencia_total +
percent_sin_agua_segura + percent_sin_saneamiento_mejorado +
percent_sin_servicio_electrico + percent_analfabeto + percent_hogares_jefatura_femenina,
promedio_de_personas_por_vivienda, data = ., cp = 0.038)
# tree based on poverty_score
tree3 <- parr0 %>%
rpart(poverty_score ~ estado + percent_urbana + densidad_ppl_km2 +
razon_de_dependencia_de_menores_de_15_anos + razon_de_dependencia_total +
percent_sin_agua_segura + percent_sin_saneamiento_mejorado +
percent_sin_servicio_electrico + percent_analfabeto + percent_hogares_jefatura_femenina,
promedio_de_personas_por_vivienda, data = ., cp = 0.044)
# tree based on gap score -- let's not use this as tree3 is more stable and will not change based on new 5W data
# tree4 <- parr0 %>%
# rpart(gap_score ~ estado + percent_urbana + densidad_ppl_km2 +
# razon_de_dependencia_de_menores_de_15_anos + razon_de_dependencia_total +
# percent_sin_agua_segura + percent_sin_saneamiento_mejorado +
# percent_sin_servicio_electrico + percent_analfabeto + percent_hogares_jefatura_femenina,
# promedio_de_personas_por_vivienda, data = ., cp = 0.045)
# plotcp(tree3)
# printcp(tree3)
# adding tree1 and tree3 rules to the dataset
parr0 <- parr0 %>%
mutate(rule1 = row.names(tree1$frame)[tree1$where]) %>%
left_join(rpart.rules.table(tree1) %>%
filter(Leaf == TRUE) %>%
rename(rule1 = Rule) %>%
group_by(rule1) %>%
summarise(subrules1 = paste(Subrule, collapse = ","))) %>%
mutate(rule3 = row.names(tree3$frame)[tree3$where]) %>%
left_join(rpart.rules.table(tree3) %>%
filter(Leaf == TRUE) %>%
rename(rule3 = Rule) %>%
group_by(rule3) %>%
summarise(subrules3 = paste(Subrule, collapse = ",")))
Below is a plot of tree3 – the 1098 parrishes where the number of beneficiaries does not exceed the number of poor persons (corresponding to the under and no coverage categories) have been split into four terminal nodes: 4, 5, 6 and 7. The manner in which they have been split is meaningful for targetting decisions and this section will compare the characteristics of each.
fancyRpartPlot(tree3, digits = -3, sub = "", palettes = "Blues", type = 2)
parr0 %>%
group_by(rule3) %>%
summarise(parr_no_ben = n_distinct(pcode3[beneficiarios == 0]),
beneficiarios = sum(beneficiarios),
avg_beneficiarios = sum(beneficiarios) / n(),
not_reached = sum(not_reached),
avg_not_reached = sum(not_reached) / n(),
avg_org_count = mean(org_count),
avg_poblacion = mean(poblacion_2019),
percent_pobre = round(sum(pob_pobre) / sum(poblacion_2019), digits = 2),
percent_urbana = round(sum(pob_urbana) / sum(poblacion_2019), digits = 2),
densidad_ppl_km2 = sum(poblacion_2019) / sum(area_km2, na.rm = TRUE),
parroquias = n()) %>%
gather(key = variable, value = value, 2:ncol(.)) %>%
spread_(key = names(.)[1], value = 'value') %>%
# reordering the table instead of having it be alphabetical
arrange(factor(variable, levels = c("not_reached", "avg_not_reached", "avg_poblacion",
"beneficiarios",
"avg_beneficiarios", "avg_org_count",
"percent_pobre", "percent_urbana", "densidad_ppl_km2",
"parroquias", "parr_no_ben"))) %>%
pander(big.mark = ",")
| variable | 4 | 5 | 6 | 7 |
|---|---|---|---|---|
| not_reached | 1,091,079 | 6,371,849 | 3,648,710 | 522,263 |
| avg_not_reached | 8,659 | 12,898 | 9,653 | 5,223 |
| avg_poblacion | 46,126 | 35,806 | 19,120 | 7,174 |
| beneficiarios | 176,330 | 352,663 | 111,813 | 17,452 |
| avg_beneficiarios | 1,399 | 713.9 | 295.8 | 174.5 |
| avg_org_count | 2.508 | 1.666 | 0.9683 | 0.39 |
| percent_pobre | 0.22 | 0.38 | 0.52 | 0.75 |
| percent_urbana | 0.99 | 0.93 | 0.75 | 0.23 |
| densidad_ppl_km2 | 996.1 | 147.4 | 18.07 | 1.855 |
| parroquias | 126 | 494 | 378 | 100 |
| parr_no_ben | 38 | 211 | 205 | 76 |
4 consists of population centres which are easy to reach, but with only 21.8% of the population being poor, careful targetting and beneficiary selection is required – blanket coverage will only result in excessive inclusion errors. It also has the highest average number of organisations present per parrish (avg_org_count). There are 126 parrishes in this group. These parrishes should not be prioritised – resources should be allocated elsewhere.
5 is probably the best option for expansion for most partners – it has the highest concentration of poor persons not covered per parrish (nc_per_parr), is substantially poorer than 4, with a poverty incidence of 38%. Additionally, these parrishes are still very urbanised (92.5%), meaning that access to these populations will not be challenging. The coverage of organisations is still fairly high and partners should consider expanding into parrishes to the ones they currently cover. This is the largest group, with 494 parrishes.
6 is where access starts to get more challenging – though these parrishes have an average poverty incidence of 52%, the rate of urbanisation drops to 75% and the population density is only 18 ppl/km2. But there are still more poor persons not covered per parrish in this group than in 4. There are 378 parrishes in this group.
7 consists of the poorest, most vulnerable and most remote parrishes. Working in these areas will incur significant operational and logistical costs. However, with an average poverty incidence of 75.2%, blanket coverage will be warranted in many cases – if the challenge of reaching all of the population can be met. Additionally, they also have the lowest average number of poor persons not covered, given their extremely low population density of 1.8 ppl/km2. Humanitarian agencies have the lowest presence in these parrishes. It is advisable for donors to incentivise activities in these areas as it is clear that humanitarian agencies are avoiding them. There are 100 parrishes in this group.
# just one note for this map -- I still can't figure out how to get the tooltip to appear when
# you're hovering over the centroid instead of at the border; hoveron fill doesn't work.
# I think you should just ask stackoverflow GIS
# hex for Set2 "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#FFFFFF"
# hex for Dark2 "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#FFFFFF"
# hex for Accent "#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#FFFFFF"
# scale_fill_viridis_d()
# scale_fill_manual(values = c())
parrmap_org <- parr %>%
left_join(parr0 %>%
select(pcode3, rule3), by = "pcode3") %>%
right_join(pcode3_shape, by = "pcode3") %>%
st_as_sf() %>%
mutate(not_reached = round(not_reached, digits = 0),
tree_node = rule3) %>%
mutate_at(vars(percent_pobre, percent_urbana), ~(round(., digits = 2))) %>%
ggplot() +
geom_sf(size = 0.1,
aes(fill = tree_node,
text = paste0(parroquia,",", "\n",
municipio, ",", "\n",
estado, "\n",
"not covered: ", not_reached, "\n",
"poverty incidence: ", percent_pobre, "\n",
"percent urban: ", percent_urbana, "\n",
"org present :", org_present),
alpha = org_present)) +
theme_void() +
scale_fill_manual(values = c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3")) +
scale_alpha_discrete(range = c(1, 0.7)) +
theme(legend.title = element_text(size = 7),
legend.text = element_text(size = 7),
plot.title = element_text(size = 11)) +
guides(alpha = FALSE) +
labs(fill = "Tree node",
alpha = "") +
ggtitle('Map of parrishes by decision tree node (colour) & if organisations present (alpha)')
ggplotly(parrmap_org, tooltip = c("text", "fill")) %>%
layout(title = list(text = paste0(
"Map of parrishes by decision tree node (colour) & if organisations present (alpha)",
"<br>",
"<sup>",
"mouse over for details; drag and click to select and zoom; double-click legend select/deselect","</sup>")))
Above is a map of parrishes by their decision tree node (denoted by colour), we have also decreased the alpha for parrishes where there are already organisations present, meaning that they appear more transparent. Looking at 4 and 5, we can see the parrishes that conform to the the Venezuela Costal Range and the Venezuelan Andes, where most of the country’s population is concentrated; as a reminder, parrishes in node 5 are excellent candidates for expansion.
We also see three large clusters of parrishes from node 7 – the poorest and most-sparsely populated areas – in Amazonas and Bolivar (at the bottom of the map), in Delta Amacuro (at the extreme right) and in Lara and Falcon (top-left).
# the main problem I see is that each of the leaves has little variance in terms of poverty incidence
# but let me know if you want maps or products focused on this tree, it's pretty easy to do. [15] is very, very attractive. Maybe I can
# 6 is dense, urban and highest operational presence,
# 2 is just too big. 800 parrishes is just too many. The low end is distinguished much better in tree3
# 14 is just rich, urban and not a priority. It's also a really small leaf.
# 15 is actually a really good leaf -- really high nc_per_parr, few parrishes, very dense, very urban and 42% poor and such an immensely low coverage percent. Good low-hanging fruit. I almost want to keep tree1 just because of this leaf. Maybe I will make one map just for this. 59,508 nc_per_parr is massive.
parr0 %>%
group_by(rule1) %>%
summarise(parr_no_ben = n_distinct(pcode3[beneficiarios == 0]),
beneficiarios = sum(beneficiarios),
ben_per_parr = sum(beneficiarios) / n(),
not_reached = sum(not_reached),
nr_per_parr = sum(not_reached) / n(),
nc_per_mun = sum(not_reached) / n_distinct(pcode2),
avg_org_count = mean(org_count),
coverage_percent = sum(beneficiarios) / sum(poblacion_2019),
percent_pobre = sum(pob_pobre) / sum(poblacion_2019),
percent_urbana = sum(pob_urbana) / sum(poblacion_2019),
densidad_ppl_km2 = sum(poblacion_2019) / sum(area_km2, na.rm = TRUE),
parroquias = n(),
municipios = n_distinct(pcode2),
parr_per_mun = n() / n_distinct(pcode2)) %>%
gather(key = variable, value = value, 2:ncol(.)) %>%
spread_(key = names(.)[1], value = 'value') %>%
arrange(factor(variable, levels = c("not_reached", "nr_per_parr", "nc_per_mun", "beneficiarios",
"ben_per_parr", "avg_org_count", "coverage_percent",
"percent_pobre", "percent_urbana", "densidad_ppl_km2",
"parroquias", "parr_no_ben", "municipios",
"parr_per_mun"))) %>% pander(big.mark = ",")
type or use slider to filter by categories or values
parr0 %>%
select(-percent_total_ben) %>%
mutate(sector_count = rowSums(select(., ends_with("_ben"))!=0)) %>%
mutate(educacion_only = ifelse(educacion_ben > 0, "educacion", ""),
nutricion_only = ifelse(nutricion_ben > 0, "nutricion", ""),
salud_only = ifelse(salud_ben > 0, "salud", ""),
wash_only = ifelse(wash_ben > 0, "wash", ""),
proteccion_only = ifelse(proteccion_ben > 0, "proteccion", ""),
sectors_present = str_trim(paste0(educacion_only, " ", nutricion_only, " ", proteccion_only, " ",
salud_only, " ", wash_only))) %>%
mutate_at(vars(pob_pobre, not_reached, org_count), ~(round(., digits = 0))) %>%
mutate_at(vars(percent_pobre, percent_urbana), ~(round(., digits = 2))) %>%
select(estado, municipio, parroquia,
percent_pobre, percent_urbana, not_reached, beneficiarios, tree_node = rule3,
org_count, sector_count, sectors_present, pcode3) %>%
arrange(desc(not_reached)) %>%
# the js is adjusting the font size for the whole container -- there doesn't seem to be another way
datatable(filter = "top", options = list(pageLength = 10, scrollX = TRUE,
initComplete = htmlwidgets::JS(
"function(settings, json) {",
paste0("$(this.api().table().container()).css({'font-size': '", "8.5pt", "'});"),
"}")
)
)